**Code to run to replicate India analysis, based on code sent by authors**April 4 2011


***SETUP

global d "F:\Natalie\Research\India Research\Surveys\DLHS_RCH3\"
global f "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\"


********************************************************PART 1: from DLHS3 Individual code.do****************************************************
/* Code for gathering maternal characteristcs and coverage information from the DLHS3 data */clear
clear matrix
clear mata
clear all
set more off
set mem 8192mset mem 1000m
set maxvar 5000
pause on
use "${d}DLHS3IIND/DLHS3IIND.dta", clear 

/* This variable records whether the woman would have been included in the sampling frame of the DLHS 2, */
/* which was currently married women aged 15-44.  We have restricted our analysis of DLHS3 data to such observations, for comparability with the earlier survey. */

gen dlhstwo = .
replace dlhstwo = 1 if ((v105 == 1)&(v104 <= 44))

keep if (dlhstwo == 1)

/* The variable obs uniquely identifies women represented in the survey: */

gen obs = _n

**pause

/* Getting a distcode: */

gen distcode = dist

/* district_wncode_dlhs3 is the variable one can use to identify districts, and to merge on the district-level codes and information. */

gen district_wncode_dlhs3 = distcode

/* Save the sample weights for analysis at the national, state and district level: */

gen iweight = .
label var iweight "India women weight"
gen sweight = .
label var sweight "State women weight"
gen dweight = .
label var dweight "District women weight"

replace iweight = iewwt
replace sweight = sewwt
replace dweight = emwt

/* Which round of the DLHS it came from: */

gen dlhs = 3

/* Within districts, uniquely indentifying the household to which the woman belongs: */

** htehsil snvq hhno
gen tehsilno = htehsil
gen villageno = snvq    /*118583 missing values - these are mostly urban */
gen householdno = hhno
gen psuno = psu
gen womenno = lineno

** so state and district are identified by district_wncode_dlhs3...
** tehsil by tehsilno...
** village by villageno..
** household by householdno...
** and the women by womenno...

** for uniquely identifying down to the level of household: 
** district_wncode_dlhs3 tehsilno villageno householdno 

** for uniquely identifying down to the level of woman: 
** district_wncode_dlhs3 tehsilno villageno householdno womenno

** for merging onto the district code book:
** district_wncode_dlhs3

/* Basic characteristics of the woman: */

/* Age */

gen age = .
replace age = v104
label var age "Mother's age in completed years"

/* CMC and year of interview: */
/* Throughout the analysis, events are recorded by their year, and also by their century-month code (CMC). */  
/* The CMC measures events down to the level of the month they took place in. */
/* The century month code is given by the following formula: */
/* (century-month code) = 12*(year - 1900) + month  */
/* For example, the century month code of January 1900 is 1, the century month code of March, 2010 is 1323. */  

gen cmcinterview = .
replace cmcinterview = (12*(vyear - 1900) + vmonth) if ((vyear ~= .)&(vmonth ~= .)&(vyear ~= 8))   /*only 4 missings*/

gen surveyyear = .
replace surveyyear = (floor((cmcinterview - 1)/12) + 1900) if (cmcinterview ~= .)

/* Year of last birth, as well as whether it was live or still: */

gen int yearlastbirth = 0
replace yearlastbirth = max(v143b_1, v143b_2, v143b_3, v143b_4, v143b_5, v143b_6)   

**the code below just sets any missing birth month to January for birthdate, and has a counter for the # of kids w/ data per observation.
gen cmclastbirth = .
replace cmclastbirth = (12*(yearlastbirth - 1900) + 1)

gen int monthlastbirth = 6
gen num_kiddos = .

forvalues i = 1/6 {

	replace cmclastbirth = (12*(yearlastbirth - 1900) + v143a_`i') if (((12*(yearlastbirth - 1900) + v143a_`i') >= cmclastbirth)&(v143a_`i' ~= .))

	** replace yearlastbirth = v143b_`i' if ((v143a_`i' ~= .)&(v143b_`i' ~= .)&(v143b_`i' > yearlastbirth))
	** replace monthlastbirth = v143a_`i' if ((v143a_`i' ~= .)&(v143b_`i' ~= .)&(v143b_`i' > yearlastbirth))
	
	** replace yearlastbirth = v143b_`i' if ((v143a_`i' ~= .)&(v143b_`i' ~= .)&(v143b_`i' == yearlastbirth)&(v143a_`i' > monthlastbirth))
	** replace monthlastbirth = v143a_`i' if ((v143a_`i' ~= .)&(v143b_`i' ~= .)&(v143b_`i' == yearlastbirth)&(v143a_`i' > monthlastbirth))
	
	replace num_kiddos = `i' if (v138_`i' ~= .)

}

** pause

replace yearlastbirth = . if (yearlastbirth == 0)
replace monthlastbirth = . if (yearlastbirth == 0)

** replace cmclastbirth = (12*(yearlastbirth - 1900) + monthlastbirth) if ((yearlastbirth ~= .)&(monthlastbirth ~= .))

gen monthsagolastbirth = .
replace monthsagolastbirth = (cmcinterview - cmclastbirth) if ((cmcinterview ~= .)&(cmclastbirth ~= .))

**The code below keeps track of any births (live+still) and live births only for the most recent birth
gen hasbirth = .
gen livebirth = .

forvalues j = 1/6 {

	replace hasbirth = 1 if (((v139_`j' == 1)|(v139_`j' == 2))&((12*(v143b_`j' - 1900) + v143a_`j') == cmclastbirth))
	replace livebirth = 1 if ((v139_`j' == 1)&((12*(v143b_`j' - 1900) + v143a_`j') == cmclastbirth))

}

/* Additional characteristics of the woman: */

/* Parity/number of births */

gen nb = .
replace nb = v134
replace nb = . if (nb == 0)   /*Note: they set nb = . here if nb == 0!!*/
label var nb "Parity"

/* Years of education */

gen yrsed = .
replace yrsed = v113 if (v113 <= 30)
replace yrsed = 0 if (v112 == 2)
label var yrsed "Years of Education"


/* Rural residence */

gen rur = .
replace rur = 1 if (htype == 1)
replace rur = 0 if (htype == 2)
label var rur "Rural Residence"

/* Coverage with interventions: */

/* JSY: */  

gen jsy = .
replace jsy = 1 if(v239 == 1)
replace jsy = 0 if (v239 == 2)
label var jsy "Received JSY/Financial Assistance"

/* ANC1 */  
gen anc1 = .
replace anc1 = 1 if (v206 == 1)
replace anc1 = 0 if (v206 == 2)
label var anc1 "ANC1 Coverage"

/* ANC3 */

gen anc3 = .
replace anc3 = 1 if ((v208 >= 3)&(v208 ~= .))
replace anc3 = 0 if ((v208 < 3)|(anc1 == 0))
label var anc3 "ANC3 Coverage"


/* In-facility birth */

gen ifb = .
replace ifb = 1 if (v230 <= 10)
replace ifb = 0 if ((v230 == 12)|(v230 == 13)|(v230 == 14)|(v230 == 11))
label var ifb "In-Facility Birth"


/* Skilled birth attendance */

gen alt_sba = .
replace alt_sba = 1 if ((v231 == 1)|(v231 == 2)|(v231 == 3))
replace alt_sba = 0 if ((v231 == 4)|(v231 == 5)|(v231 == 7))
replace alt_sba = 0 if (ifb == 1)

gen sba = . 
replace sba = 1 if ((v231 == 1)|(v231 == 2))
replace sba = 0 if ((v231 == 4)|(v231 == 5)|(v231 == 3)|(v231 == 7))
replace sba = 0 if (ifb == 1)
label var sba "Skilled Birth Attendance"

sort district_wncode_dlhs3 tehsilno villageno householdno

keep if (hasbirth == 1)  
count /*NC: Now at 187355 obs --> this has already restricted births to most recent birth only*/

/* We are restricting our analysis to women whose births took place since January 1, 2004.  Most DLHS3 interviews took place in 2008. */
/* The questionnaire is only supposed to gather information on birhts in that timeframe, however to be sure, the following code removes observations from earlier. */

keep if (cmclastbirth >= 1249)  /*NC: Still at 187354 obs --> This code restricts to births more recent than Jan 1 2004*/


keep obs district_wncode_dlhs3 state dist dlhstwo tehsilno villageno psuno householdno womenno iweight sweight dweight distcode dlhs age cmcinterview surveyyear yearlastbirth cmclastbirth monthsagolastbirth livebirth hasbirth nb yrsed rur jsy anc1 anc3 ifb sba alt_sba v231 v226e fac1_1 htehsil snvq v236 v237 v238 

compress

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta", replace

*********************************************************************************PART 2: from DLHS3 Mortality Code.do**************************************************

/* Code to gather information on births and their outcomes from the DLHS3 */

clear
clear matrix
clear mata
clear all
set more off
set mem 8192m
set mem 1000m
set maxvar 5000
pause on

use "F:\Natalie\Research\India Research\Surveys\DLHS_RCH3\DLHS3IIND\DLHS3IIND.dta", clear 

/* This variable records whether the woman would have been included in the sampling frame of the DLHS 2, */
/* which was currently married women aged 15-44.  We have restricted our analysis of DLHS3 data to such observations, for comparability with the earlier survey. */

gen dlhstwo = .
replace dlhstwo = 1 if ((v105 == 1)&(v104 <= 44))

keep if (dlhstwo == 1)

/* obs is a variable to uniquely identify mothers. */

gen obs = _n

/* Getting a distcode: */

gen distcode = dist

/* This is the variable one can use to identify districts, and to merge on the district-level codes and information. */

gen district_wncode_dlhs3 = distcode

/* Save the sample weights for analysis at the national, state and district level. */

gen iweight = .
gen sweight = .
gen dweight = .

replace iweight = iewwt
replace sweight = sewwt
replace dweight = emwt

/* Within districts, uniquely indentifying the household to which the woman belongs: */

** htehsil snvq hhno
gen tehsilno = htehsil
gen villageno = snvq
gen householdno = hhno
gen psuno = psu
gen womenno = lineno

** so state and district are identified by district_wncode_dlhs3...
** tehsil by tehsilno...
** village by villageno..
** household by householdno...
** and the women by womenno...

** for uniquely identifying down to the level of household: 
** district_wncode_dlhs3 tehsilno villageno householdno 

** for uniquely identifying down to the level of woman: 
** district_wncode_dlhs3 tehsilno villageno householdno womenno

** for merging onto the district code book:
** district_wncode_dlhs3

/* CMC of interview */

gen cmcinterview = .
replace cmcinterview = (12*(vyear - 1900) + vmonth) if ((vyear ~= .)&(vmonth ~= .)&(vyear ~= 8))


/* Here comes the reshape: */

** v138_# = "pregnancy index," generally seems to be equal to # itself...  
** v139_# is the outcome of the pregnancy: 1 => live birth; 2 => still birth; 3 => induced abortion; 4 => spontaneous abortion
** v141_# is whether it was a single or multiple birth.  
** v141a_# is the birth order, generally equal to # although not always.   
** v143a_# is the month of the birth or abortion, 
** and v143b_# is the year thereof.  
** v145_# is whether or not the child is alive... 1 => alive, 2 = > dead.  
** v146_# is the age at time of death, recorded in a three-digit format 100-130, 201-223, 300-305, 999

keep district_wncode_dlhs3 psuno villageno householdno womenno cmcinterview obs iweight sweight dweight v138_* v139_* v141_* v141a_* v143a_* v143b_* v145_* v146_*

*pause 

/* Reshape the data, so that each row is a birth instead of a mother. */

reshape long v138_ v139_ v141_ v141a_ v143a_ v143b_ v145_ v146_, i(district_wncode_dlhs3 psuno villageno householdno womenno cmcinterview obs iweight sweight dweight) j(index)

** merge m:m district_wncode_dlhs3 using "J:\Project\Coverage\In-Facility Birth Project\Code Book.dta"

** drop _merge

keep if ((v139_ == 1)|(v139_ == 2))


/* The year of the survey */

gen surveyyear = .
replace surveyyear = (floor((cmcinterview - 1)/12) + 1900) if (cmcinterview ~= .)

/* The round of the survey where these observations come from */

gen dlhs = 3

/* The CMC and year of birth, see the comments in "DLHS3 Individual Code" for an explanation the CMC code for recording dates. */  

gen cmcbirth = .
replace cmcbirth = (12*(v143b_ - 1900) + v143a_) if ((v143b_ ~= .)&(v143a_ ~= .))
label var cmcbirth "CMC of birth of child"

gen yearbirth = .
replace yearbirth = (floor((cmcbirth - 1)/12) + 1900) if (cmcbirth ~= .)
label var yearbirth "Year of birth of child"


/* The child's implied age in months, calculated by subtracting the CMC of the birth from that of the interview. */

gen cmcage = .
replace cmcage = (cmcinterview - cmcbirth) if ((cmcinterview ~= .)&(cmcbirth ~= .))
label var cmcage "Months between birth and interview"

gen birth_index = .
label var birth_index "1 => most recent birth for mother"

/* Single birth or multiple (twins, triplets, etc.) */

gen mult_birth = .
replace mult_birth = 1 if (v141_ == 2)
replace mult_birth = 0 if (v141_ == 1)
label var mult_birth "birth was part of a multiple birth"

/* Survival status of the child. */

gen died = .
replace died = 1 if (v145_ == 2)
replace died = 0 if (v145_ == 1)

gen alive = .
replace alive = 1 if (died == 0)
replace alive = 0 if (died == 1)

/* Whether the child was still born */

gen sb = .
replace sb = 1 if (v139_ == 2)
replace sb = 0 if (v139_ == 1)
replace died = 1 if (sb == 1)
label var sb "Still birth"

/* Age of the child at death:  What units it is recorded in... */

gen b08_ = .
replace b08_ = 1 if ((v146_ >= 100)&(v146_ <= 130))
replace b08_ = 2 if ((v146_ >= 201)&(v146_ <= 223))
replace b08_ = 3 if ((v146_ >= 300)&(v146_ <= 305))
label var b08_ "Units of age at death"

/* ...and how many of them had passed. */

gen b09_ = .
replace b09_ = (v146_ - b08_*100) if ((v146_ ~= .)&(b08_ ~= .))
label var b09_ "Age at death"


/* Early Neonatal Mortality */
gen enm = .
replace enm = 1 if ((died == 1)&(b08_ == 1)&(b09_ <= 7))
**NC: I think the code below should be:  replace enm = 0 if ((died == 1)&(b08_ == 1)&(b09_ > 7))!!! Have changed this from  replace enm = 0 if ((died == 1)&(b09_ == 1)&(b09_ > 7))
replace enm = 0 if ((died == 1)&(b08_ == 1)&(b09_ > 7))
/* I have a *few* questions about this next line of code, see the comments in the neonatal mortality code lines. */
replace enm = 0 if ((died == 1)&((b08_ == 2)|(b08_ == 3)))
replace enm = . if (sb == 1)
replace enm = 0 if (died == 0)
label var enm "Early neonatal mortality"


/* Neonatal Mortality */
gen nnm = .
replace nnm = 1 if (enm == 1)
replace nnm = 1 if ((died == 1)&(b08_ == 1)&(b09_ <= 31))
replace nnm = 1 if ((died == 1)&(b08_ == 2)&(b09_ == 1))
replace nnm = 0 if ((died == 1)&(b08_ == 2)&(b09_ > 1))
/* I actually question this next line of code because there are some cases where the age of death is supposedly recorded in years, */
/* but it seems more plausible if it is in fact months. */
replace nnm = 0 if ((died == 1)&(b08_ == 3))
replace nnm = . if (sb == 1)
replace nnm = 0 if (died == 0)
label var nnm "Neonatal mortality"

pause  

/* Infant Mortality */
gen imr = .
replace imr = 1 if (nnm == 1)
replace imr = 1 if ((died == 1)&(b08_ == 2)&(b09_ <= 12))
replace imr = 1 if ((died == 1)&(b08_ == 3)&(b09_ == 1))
replace imr = 0 if ((died == 1)&(b08_ == 2)&(b09_ > 12))
replace imr = 0 if ((died == 1)&(b08_ == 3)&(b09_ > 1))
replace imr = . if (sb == 1)
replace imr = 0 if (died == 0)
label var imr "Infant mortality"


/* We would like to restrict our analysis to most-recent births since January 1, 2004 */

**count of vars pre doing this is 283338, no obs deleted doing the line of code below.
keep if (cmcbirth >= 1249)

keep district_wncode_dlhs3 psuno villageno householdno iweight sweight dweight obs cmcinterview dlhs surveyyear cmcbirth cmcage died alive enm nnm imr b08_ b09_ sb birth_index yearbirth mult_birth

compress

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets\DLHS3 Data.dta", replace



*********************************************************************************PART 3: from DLHS3 Household Code.do**************************************************

/* Code for merging household-level covariates onto DLHS 3 coverage/maternal characteristics information. */

clear
clear matrix
clear mata
clear all
set more off
set mem 1500m
set maxvar 7000
pause on

use "F:\Natalie\Research\India Research\Surveys\DLHS_RCH3\DLHS3HIND\DLHS3HIND.dta" 

/* This is the variable one can use to identify districts, and to merge on the district-level codes and information. */

gen district_wncode_dlhs3 = dist

/* These narrow it down to the level of household: */

gen tehsilno = htehsil
gen villageno = snvq
gen householdno = hhno
gen psuno = psu

** gen cmcinterview = .
** replace cmcinterview = (12*(hyear - 1900) + hmonth) if ((hmonth ~= .)&(hyear ~= .))

*duplicates report district_wncode_dlhs3 tehsilno villageno householdno psuno

*Create caste variable
foreach num of numlist 1/4 {
gen caste_`num' = 0
}
replace caste_1 = 1 if hv116b==1 
replace caste_2 = 1 if hv116b==2 
replace caste_3 = 1 if hv116b==3 
replace caste_4 = 1 if hv116b>=4  /*other AND none AND missing*/
gen caste = 1 if caste_1 == 1
replace caste = 2 if caste_2 == 1
replace caste = 3 if caste_3 == 1
replace caste = 4 if caste_4 == 1

**Get the religion var back in

ren hv115 religion
replace religion = 6 if religion >=6   /*other*/

**Get the bpl_card var back in

ren hv134 bpl_card

keep district_wncode_dlhs3 tehsilno villageno householdno psuno caste religion bpl_card hhno psu htehsil snvq

merge 1:m district_wncode_dlhs3 tehsilno villageno householdno psuno using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta" 

drop if _merge ~= 3

drop _merge

count  /*NC: 187354 */

duplicates report obs

duplicates drop obs, force

compress

*Create wealth decile from fac1_1 

xtile  dlhs3_country_decile2= fac1_1, n(10)
xtile dlhs3_country_quintile2 = fac1_1, n(5)


*********Intermediate Piece before running Permanent Income stuff *******

**Merge in the household data with the snhq var
sort  state dist psu hhno htehsil snvq 
merge m:1 state dist psu hhno htehsi snvq using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\DLHS3_HH_for_merge.dta"

drop if _merge == 2
drop if _merge == 1
drop _merge


*Before doing the permanant income stuff below, need the HHID var
egen hhid = concat(state dist psu hhno htehsil psupop snvq snhq dlhs)

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta", replace


******************PERMANENT INCOME STUFF************************

/* Now the permanent income stuff. */

clear
clear all
pause on

use "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Files from authors\PI data\DLHS2_3_PI_full"

pause

keep if (dlhs == 3)

/* This is the variable one can use to identify districts, and to merge on the district-level codes and information. */

gen district_wncode_dlhs3 = district_hhcode_dlhs3



/* These narrow it down to the level of household: */
**we can't run the lines below b/c these vars are missing from the PI datasets 
/*
//gen tehsilno = htehsil
//gen villageno = snvq
//gen householdno = hhno
//gen psuno = psu

//duplicates report district_wncode_dlhs3 tehsilno villageno householdno psuno

//keep hhid district_wncode_dlhs3 tehsilno villageno householdno psuno hh_p_income dlhs3_country_quintile dlhs3_country_decile dlhs2_country_quintile dlhs2_country_decile full_country_quintile full_country_decile urban female age_years literate education hv134 hhwt ihhwt shhwt educlevel DIST_* 

*merge 1:m district_wncode_dlhs3 tehsilno villageno householdno psuno using "J:\Project\Coverage\In-Facility Birth Project\Maternal Datasets\DLHS3 Data.dta" 
*merge 1:m district_wncode_dlhs3 tehsilno villageno householdno psuno using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data.dta" 
*/


merge 1:m hhid using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta" 

drop if _merge != 3

drop _merge

duplicates report obs

duplicates drop obs, force

compress

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta" , replace


***************************************VILLAGE LEVEL**********************************
/* Code for gathering data from the DLHS3 Village-Level Questionnaire and merging it onto the coverage/maternal characteristics data. */

clear
clear matrix
clear mata
clear all
set more off
set mem 800m
set maxvar 5000
pause on

use "F:\Natalie\Research\India Research\Surveys\DLHS_RCH3\DLHS3VIND\DLHS3VIND.dta", clear

/* Getting a distcode: */

gen distcode = dist

/* This is the variable one can use to identify districts, and to merge on the district-level codes and information. */

gen district_wncode_dlhs3 = distcode

/* Uniquely identifying down to the level of village. */

gen villageno = slvq
gen psuno = vpsu

/* nb district_wncode_dlhs3 psuno uniquely identifies observations: */

duplicates report district_wncode_dlhs3 psuno

pause

/* Distance to facilities */

gen distfac = .
label var distfac "Distance to nearest medical facility (kilometers)"

/* Distance to Government/District(/Municipal) Hospital: */

gen distgmh = .
replace distgmh = 0 if (v116f1 == 1)
replace distgmh = v116f2 if ((v116f1 == 2)&(v116f2 ~= .))
label var distgmh "Distance to nearest govt/municipal/dist Hospital (kilometers)"

/* Distance to CHC/Rural Hospital */

gen distcrh = .
replace distcrh = 0 if (v116e1 == 1)
replace distcrh = v116e2 if ((v116e1 == 2)&(v116e2 ~= .))
label var distcrh "Distance to nearest CHC/rural hospital (kilometers)"

/* Distance to other government facility */

gen distogf = . 
replace distogf = 0 if ((v116c1 == 1)|(v116d1 == 1)|(v116b1 == 1)|(v116g1 == 1))
replace distogf = min(v116c2, v116d2, v116b2, v116g2) if ((v116c1 == 2)&(v116d2 == 2)&(v116b2 == 2)&(v116g2 == 2)&((v116c1 ~= .)|(v116d1 ~= .)|(v116b1 ~= .)|(v116g1 ~= .)))    
label var distogf "Distance to nearest other govt facility (kilometers)"

/* Distance to NGO/Trust Hospital or Clinic */

gen distnthc = .
label var distnthc "Distance to nearest NGO/trust hospital (kilometers)"

/* Distance to Private facility of some sort */

gen distpf = .
replace distpf = 0 if ((v116h1 == 1)|(v116i1 == 1)) 
replace distpf = min(v116h2, v116i2) if ((v116h1 == 2)&(v116i1 == 2)&((v116h2 ~= .)|(v116i2 ~= .))) 
label var distpf "Distance to nearest private facility (kilometers)"

/* Distance to any sort of private facility or NGO/Trust facility: */

gen distpn = .
replace distpn = distpf
label var distpn "Distance to nearest NGO/trust or private facility (kilometers)"

replace distfac = min(distgmh, distcrh, distogf, distnthc, distpf) if ((distgmh ~= .)|(distcrh ~= .)|(distogf ~= .)|(distnthc ~= .)|(distpf ~= .))


/* Village Participation in JSY */

**********SR Note: i don't think we use these variables? we only have individual level JSY?******************

gen ijsy = .
replace ijsy = 1 if (v135aa == 1)
replace ijsy = 0 if (v135aa == 2)
label var ijsy "Village has implemented JSY"

gen bjsy = .
replace bjsy = 1 if (v135ab == 1)
replace bjsy = 0 if (v135ab == 2)
label var bjsy "Beneficiaries of JSY in village"

/* Merge this information onto the maternal data. */
** gmh crh ogf nthc pf pn
keep district_wncode_dlhs3 villageno psuno distfac distgmh distcrh distogf distnthc distpf distpn ijsy bjsy

merge 1:m district_wncode_dlhs3 psuno using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta"

drop if _merge == 1

drop _merge

compress

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta", replace



*************************************MERGING************************
/* Code for merging together DLHS3 births and mothers */

/*
clear
clear matrix
clear mata
clear all
set more off
set mem 700m
set maxvar 7000pause on

/* Not sure I'll need the country code thing here... */
** do "J:\Usable\Common Indicators\Country Codes\countrycode.ado"

use "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Maternal Datasets\DLHS3 Data v2.dta" , clear
*/

merge 1:m obs using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets\DLHS3 Data.dta" 
/*59 217 from using not matched */

drop if _merge != 3

gsort + obs - yearbirth - cmcbirth

bysort obs: replace birth_index = _n
replace mult_birth = 1 if (age ~= .)&(birth_index ~= 1)&(mult_birth ~= 1)
** recode birth_index (1=1) (2/max=0) (else=.), generate(mrb)


gen birth_interval = .
bysort obs: replace birth_interval = (cmcbirth[_n] - cmcbirth[_n+1])
bysort obs: replace birth_interval = (birth_interval[_n+1]) if (birth_interval[_n]==0 & mult_birth[_n]==1 & mult_birth[_n+1]==1)

** replace birth_interval = . if ((birth_interval > 0)&(birth_interval < 9))

keep if birth_index == 1

recode birth_interval (min/11 = 1) (12/23 = 2) (24/max = 3)
replace birth_interval = 3 if (birth_interval == .)&(cmcage <= 24)
replace birth_interval = 3 if (nb == 1)
label define birth_interval 1 "0-11 Months" 2 "12-23 Months" 3 "24<= Months"
label values birth_interval birth_interval

pause
drop _merge
pause

*convert state to string to get the merge to work
tostring state, gen(state1)
ren state state2
ren state1 state
merge m:1 district_wncode_dlhs3 using "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\DLHS3 District Merge.dta"

drop if _merge ~= 3   /*no obs dropped */
drop _merge

/* Categorical versions of continuous variables */

/* Education: */

*rename educlevel hheduc_cat
*label variable hheduc_cat "HH head education"
recode yrsed (0=0) (1/5=1) (6/11=2) (12/max=3), generate(mateduc_cat)
label variable mateduc_cat "Maternal education"
label values mateduc_cat educlevel

/* Parity: */

recode nb (0=.) (1=1) (2=2) (3/5=3) (5/max=4), generate(nb_cat)
label variable nb_cat "Parity categorical"
label define nb_cat 1 "1" 2 "2" 3 "3 to 5" 4 "5 or more"
label values nb_cat nb_cat

/* Maternal age: */

recode age (15/19=1) (20/24=2) (25/29=3) (30/34=4) (35/39=5) (40/44=6), generate(matage_cat)
label variable matage_cat "Maternal age categorical"
label define matage_cat 1 "15-19" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40-44"
label values matage_cat matage_cat

/* Distance to facility: */

gen urban = 1 if rur == 0
replace urban = 0 if rur == 1
replace distfac=0 if urban==1
recode distfac (min/4=1) (5/9=2) (10/19=3) (20/98=4) (99/max=.), generate(distfac_cat)
label variable distfac_cat "Distance to facility categorical (kms)"
label define distfac_cat 1 "0-4" 2 "5-9" 3 "10-19" 4 "20+"
label values distfac_cat distfac_cat
tab distfac_cat urban

/* Combined distance/urban variable */

gen res_cat = .
label var res_cat "Residence Category"
replace res_cat = 1 if (urban == 1)
replace res_cat = 2 if (urban ~= 1)&(distfac_cat == 1)
replace res_cat = 3 if (urban ~= 1)&(distfac_cat == 2)
replace res_cat = 4 if (urban ~= 1)&(distfac_cat == 3)
replace res_cat = 5 if (urban ~= 1)&(distfac_cat == 4)
label define res_cat 1 "Urban" 2 "Rural, 0-4 km from facility" 3 "Rural, 5-9 km from facility" 4 "Rural, 10-19 km from facility" 5 "Rural, 20 km or more from facility"

/* Recode birth interval to missing category */

replace birth_interval=4 if birth_interval==.
label define birth_interval 4 "Unknown", add

/* Perinatal mortality (which is still births plus early neonatal mortality) */

generate pnm=enm
replace pnm=1 if sb==1

/* Recode years of birth */

replace yearbirth=. if yearbirth>2009
replace yearbirth=1999 if yearbirth==1998
replace yearbirth=2008 if yearbirth==2009

/* Cleaning up CMC age */

replace cmcage=. if cmcage<0

/* District-level wealth indicators. */

//DIST_mean_hh_p_income3 and DIST_sd_hh_p_income3 do not exist. Create them from hh_p_income
by  district_wncode_dlhs3, sort: egen DIST_mean_hh_p_income3 = mean(hh_p_income)  
by  district_wncode_dlhs3, sort: egen DIST_sd_hh_p_income3 = sd(hh_p_income)  

generate dist_mean_pi=DIST_mean_hh_p_income3 if dlhs==3

generate dist_sd_pi=DIST_sd_hh_p_income3 if dlhs==3

**Our code:
by  district_wncode_dlhs3, sort: egen dist_mean_pi2 = mean(fac1_1)  
by  district_wncode_dlhs3, sort: egen dist_sd_pi2 = sd(fac1_1) 


/* Create a district merge code and then fill in district JSY coverage */

egen district_merge_code=group(district_merge)
label variable district_merge_code "District merge code"

bysort district_merge_code dlhs : egen dist_jsy=mean(jsy)

generate jsy_infac=0 if jsy!=. & ifb!=.
replace jsy_infac=1 if jsy==1 & ifb==1
generate jsy_outfac=0 if jsy!=. & ifb!=.
replace jsy_outfac=1 if jsy==1 & ifb==0

pause

** lps
gen caseidno = .
*keep villageno psuno distfac distgmh distcrh distogf distnthc distpf distpn  ijsy bjsy dlhs3_country_quintile dlhs3_country_decile  urban age_years education hv134 hhwt shhwt ihhwt hheduc_cat DIST_mean_hh_p_income3 DIST_sd_hh_p_income3 DIST_percent_in_lowest_quintile3 DIST_change_PI DIST_change_poor DIST_change_inequality tehsilno householdno caseidno caste religion obs iweight sweight dweight dlhs womenno age cmcinterview surveyyear cmclastbirth monthsagolastbirth yearlastbirth livebirth hasbirth nb yrsed jsy anc1 anc3 ifb gmh crh ogf nthc pf pn sba csec ppc bf b08_ b09_ cmcbirth yearbirth cmcage birth_index mult_birth alive died enm nnm imr sb birth_interval state state_code_dlhs1 district_merge district_hhcode_dlhs3 district_wncode_dlhs3 district_hhcode_dlhs2 district_hhcode_dlhs1 state_code_dlhs3 state_cat small_state mateduc_cat nb_cat matage_cat distfac_cat pnm dist_mean_pi dist_sd_pi district_merge_code dist_jsy jsy_infac jsy_outfac  bpl_card health_insurance dm_coef sm_coef res_cat dm_w_coef alt_sba v231 v226e state_name district_name  

pause

*order district_merge district_merge_code district_name state_cat small_state state state_name state_code_dlhs1 state_code_dlhs2 state_code_dlhs3 district_hhcode_dlhs1 district_hhcode_dlhs2 district_hhcode_dlhs3 district_wncode_dlhs2 district_wncode_dlhs3 tehsilno psuno villageno householdno caseidno womenno obs cmcinterview surveyyear dlhs iweight sweight dweight dm_coef dm_w_coef ihhwt shhwt hhwt DIST_mean_hh_p_income2 DIST_sd_hh_p_income2 DIST_percent_in_lowest_quintile2 DIST_mean_hh_p_income3 DIST_sd_hh_p_income3 DIST_percent_in_lowest_quintile3 DIST_change_PI DIST_change_poor DIST_change_inequality dist_mean_pi dist_sd_pi dist_jsy distfac distfac_cat res_cat yrfac distgmh yrgmh distcrh yrcrh distogf yrogf distnthc yrnthc distpf yrpf distpn yrpn mcever nomc ijsy bjsy hh_p_income dlhs3_country_quintile dlhs3_country_decile dlhs2_country_quintile dlhs2_country_decile full_country_quintile full_country_decile bpl_card health_insurance urban age_years literate education hv134 hheduc_cat caste religion age cmclastbirth monthsagolastbirth yearlastbirth livebirth hasbirth nb yrsed mateduc_cat nb_cat matage_cat cmcbirth yearbirth cmcage birth_index mult_birth birth_interval alive died jsy jsy_infac jsy_outfac anc1 anc3 anc4 ifb gmh crh ogf nthc pf pn sba alt_sba v231 v226e csec ppc bf sb pnm enm nnm imr b08_ b09_          

** pause

label var b08_ "Units of age at death"
label var b09_ "Age at death"

compress

save "F:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets\DLHS3 Births.dta", replace

/* The End */







